{
  # this script evaluates and plot results 
  
  # setup 
  rank2 <- function(zer){rank(zer)/length(zer)}
  
  # run loop over examples 
  for(exampleNum_i in unique( my_grid$exampleNum_i )){
    # process example data
    source("./Analysis/LinkOrgs_ProcessExampleData.R")

    # get example specifications
    my_grid_ <- my_grid[my_grid$exampleNum_i == exampleNum_i, ]

    # setup file names
    save_names_vec <- c(sapply(1:nrow(my_grid_),function(zer){
      sprintf("./Results/z_LinkOrgs_Ex%s_%s_%s.csv",
              my_grid_[zer,]$exampleNum_i,
              my_grid_[zer,]$algorithm_i,
              my_grid_[zer,]$distanceMeasure_i)
    } ))

    # subset data to where we have results
    my_grid_ <- my_grid_[fileExists_ <- file.exists(save_names_vec),]
    save_names_vec <- save_names_vec[ fileExists_ ]

    # entry pretty keys
    save_names_letters <- sapply(1:nrow(my_grid_),function(zer){
      distanceMeasure_i <- as.character(  my_grid_[zer,]$distanceMeasure_i )
      algorithm_i <- as.character(  my_grid_[zer,]$algorithm_i )
      if(algorithm_i == "ml"){val_ <- c("ML")}
      if(algorithm_i == "fuzzy"){val_ <- c("F")}
      if(algorithm_i == "bipartite" & distanceMeasure_i == "ml"){val_ <- c("B-ML")}
      if(algorithm_i == "bipartite" & distanceMeasure_i == "jaccard"){val_ <- c("B")}
      if(algorithm_i == "markov" & distanceMeasure_i == "ml"){val_ <- c("M-ML")}
      if(algorithm_i == "markov" & distanceMeasure_i == "jaccard"){val_ <- c("M")}
      if(algorithm_i == "transfer"){val_ <- "T"}
      if(algorithm_i == "DeezyMatch"){val_ <- "D"}
      if(algorithm_i == "lookup"){val_ <- "LU"}
      return( val_ )
    } )

    # entry pretty names
    pretty_save_names_vec <- sapply(1:nrow(my_grid_),function(zer){
      distanceMeasure_i <- as.character(  my_grid_[zer,]$distanceMeasure_i )
      algorithm_i <- as.character(  my_grid_[zer,]$algorithm_i )
      if(algorithm_i == "ml"){val_ <- c("ML")}
      if(algorithm_i == "fuzzy"){val_ <- c("Fuzzy")}
      if(algorithm_i == "bipartite" & distanceMeasure_i == "ml"){val_ <- c("Bipartite-ML")}
      if(algorithm_i == "markov"    & distanceMeasure_i == "ml"){val_ <- c("Markov-ML")}
      if(algorithm_i == "bipartite" & distanceMeasure_i == "jaccard"){val_ <- c("Bipartite")}
      if(algorithm_i == "markov" & distanceMeasure_i == "jaccard"){val_ <- c("Markov")}
      if(algorithm_i == "transfer"){val_ <- "LLM-T"}
      if(algorithm_i == "DeezyMatch"){val_ <- "DeezyMatch"}
      if(algorithm_i == "lookup"){val_ <- "Lookup"}
      return( val_ )
    } )
     plot_col_vec <- sapply(1:nrow(my_grid_),function(zer){
      distanceMeasure_i <- as.character(  my_grid_[zer,]$distanceMeasure_i )
      algorithm_i <- as.character(  my_grid_[zer,]$algorithm_i )
      if(algorithm_i == "ml"){val_ <- 10}
      if(algorithm_i == "bipartite" & distanceMeasure_i == "ml"){val_ <- 8}
      if(algorithm_i == "markov" & distanceMeasure_i == "ml"){val_ <- 8}
      if(algorithm_i == "bipartite" & distanceMeasure_i == "jaccard"){val_ <- 10}
      if(algorithm_i == "markov" & distanceMeasure_i == "jaccard"){val_ <- 10}
      if(algorithm_i == "fuzzy"){val_ <- 1}
      if(algorithm_i == "transfer"){val_ <- 1}
      if(algorithm_i == "DeezyMatch"){val_ <- 1}
      if(algorithm_i == "lookup"){val_ <- 1}
      # plot(1:25, pch = 19, col = 1:25)
      return( val_ )
    } )
     plot_lty_vec <- sapply(1:nrow(my_grid_),function(zer){
       distanceMeasure_i <- as.character(  my_grid_[zer,]$distanceMeasure_i )
       algorithm_i <- as.character(  my_grid_[zer,]$algorithm_i )
       if(algorithm_i == "ml"){val_ <- 1}
       if(algorithm_i == "bipartite" & distanceMeasure_i == "ml"){val_ <- 10}
       if(algorithm_i == "bipartite" & distanceMeasure_i == "jaccard"){val_ <- 2}
       if(algorithm_i == "markov" & distanceMeasure_i == "ml"){val_ <- 12}
       if(algorithm_i == "markov" & distanceMeasure_i == "jaccard"){val_ <- 9}
       if(algorithm_i == "fuzzy"){val_ <- 2}
       if(algorithm_i == "transfer"){val_ <- 1}
       if(algorithm_i == "DeezyMatch"){val_ <- 1}
       if(algorithm_i == "lookup"){val_ <- 9}
       # for(i in 1:20){plot(1:20, type = "l", lty = i, main = i)}
       return( val_ )
     } )
    # order everything by name
    plot_col_vec <- plot_col_vec[ order(pretty_save_names_vec) ]
    plot_lty_vec <- plot_lty_vec[ order(pretty_save_names_vec) ]
    save_names_letters <- save_names_letters[ order(pretty_save_names_vec) ]
    save_names_vec <- save_names_vec[ order(pretty_save_names_vec) ]
    pretty_save_names_vec <- pretty_save_names_vec[ order(pretty_save_names_vec) ]

    # example 4 gets a special sequence
    if(exampleNum_i == 4){
      x_transformFxn <- y_transformFxn <- function(ze){log(ze,base=10)}
      Ex4StatFxn <- function(dat_, COEF_TRUE = NULL, COEF_TRUE_LB = NULL, COEF_TRUE_UB = NULL){
        library("lmtest"); library("sandwich")
        dat_ <- as.data.frame( dat_ )
        noContributions <- x_red$CompanyName[!x_red$CompanyName %in% dat_$CompanyName]
        noContribDat <- dat_[1:length( noContributions ),];  noContribDat[] <- NA
        sharedNames <- intersect(colnames(noContribDat),colnames(  x_red)  )
        noContribDat[,sharedNames] <- x_red[x_red$CompanyName %in% noContributions,sharedNames]
        noContribDat$TotalContribAmount <- 0
        dat_ <- rbind(dat_,noContribDat)
        lm_ <- lm( log(TotalContribAmount+1) ~ log(MeanAssets+1), data = dat_)
        coef_obs <- as.vector( lm_$coefficients[2] )
        tstat_ <- as.vector(f2n( (coef_mat_ <- coeftest(lm_, vcov = vcovHC(lm_, type = "HC0"))) [2,3]))
        se_obs <- as.vector( coef_mat_[2,2] )
        r2_ <- as.vector( summary( lm_  )$adj.r.squared)

        # truth in raw SE
        TRUTH_IN_SI <- NULL; if(!is.null(COEF_TRUE)){
          TRUTH_IN_SI <- 1* c(coef_obs - 1.96*se_obs <= COEF_TRUE & coef_obs + 1.96*se_obs >= COEF_TRUE)
        }

        # simulate power
        propTruthInCI <- NA
        if(!is.null(COEF_TRUE_LB)){
          dat_$log_TotalContribAmount <- log(dat_$TotalContribAmount+1)
          dat_$log_MeanAssets <- log(dat_$MeanAssets+1)
          propTruthInCI <-  mean( replicate(100,{

            # bootstrap draw (with replacement )
            dat__ <- dat_[sample(1:nrow(dat_),nrow(dat_),replace=T),]

            # lm test
            lm__ <- lm( log_TotalContribAmount ~ log_MeanAssets, data = dat__)
            coef_test_ <- coeftest(lm__, vcov = vcovHC(lm__, type = "HC0"))[2,1:2]
            tmp1_ <- c(coef_test_[1] - 1.96*coef_test_[2], coef_test_[1] + 1.96*coef_test_[2])
            tmp2_ <- c(COEF_TRUE_LB, COEF_TRUE_UB)
            TRUTH_IN_SI_ <- any(tmp1_ >= tmp2_[1] & tmp1_ <= tmp2_[2])
            return( TRUTH_IN_SI_ )
          }) )
        }

        return( list("r2"=r2_, "tstat"=tstat_,
                     "coef_obs" = coef_obs, "se_obs" = se_obs,
                     "TRUTH_IN_SI" = as.vector( TRUTH_IN_SI ),
                     "propTruthInCI" = as.vector( propTruthInCI ) )  )
      }
      COEF_TRUE <- as.vector((stat_results_true <- Ex4StatFxn(z_red_human))$coef_obs)
      COEF_TRUE_LB <- as.vector(stat_results_true$coef_obs - 1.96*stat_results_true$se_obs)
      COEF_TRUE_UB <- as.vector(stat_results_true$coef_obs + 1.96*stat_results_true$se_obs)
    }

    # obtaining accuracy data and plotting results iterating over distance threshold values
    save_name_sub <- execution_time <- c()
    RX_final_master <- matrix(ncol=0, nrow=nCuts_MASTER <- 50)
    for(save_name_i in save_names_vec){
      LinkOrgs::print2(paste0("Evaluating ", save_name_i))

      # read in data
      z_red_i <- data.table::fread( save_name_i )
      execution_time_ <- z_red_i$ExecutionTime[1]
      if(is.null(execution_time_)){execution_time_ <- NA}
      execution_time <- c(execution_time, execution_time_)
      save_name_i_sub <- gsub(paste(strsplit(save_name_i,split="_")[[1]][-c(1:3)],collapse = "_"),
                              pattern="\\.csv",replace="")
      save_name_sub <- c(save_name_sub, save_name_i_sub )

      # iterate over quantiles
      dist_seq_full <- quantile(rank2(z_red_i$minDist), prob = rank2(1:nCuts_MASTER)^5)
      RX_final <- c(); for(dist_ in dist_seq_full){
        LinkOrgs::print2(  sprintf("%s: %.5f%%", save_name_i, 100*dist_   ) )

        z_red_i_subset <- as.data.frame( z_red_i[rank2(z_red_i$minDist) <= dist_,] )

        Results_ <- c("TruePositives"=NA,"FalsePositives"=NA,"FalseNegatives"=NA,
                      "TrueNegatives"=NA, "MatchedDatasetSize"=NA)
        if(  nrow(z_red_i_subset) > 0  ){
        Results_ <- AssessMatchPerformance(x = x_red, by.x = results_by_x,
                                           y = y_red, by.y = results_by_y,
                                           z = z_red_i_subset,
                                           z_true = z_red_human,
                                           openBrowser = F)
        # View(z_red_i_subset[order(z_red_i_subset$minDist),c(results_by_x,results_by_y,"minDist")])

        # substantive analysis
        if(exampleNum_i == 4){
          Results_["RegR2"] <- Results_["RegCoef"] <- Results_["RegSE"] <- NA
          if(Results_["MatchedDatasetSize"] != 0){
            Results_["RegR2"] <- (reg_results <- Ex4StatFxn(z_red_i_subset))$r2
            Results_["RegCoef"] <- reg_results$coef_obs
            Results_["RegSE"] <- reg_results$se_obs
          }
        }
        }

        # main performance evaluation
        names(Results_) <- paste(save_name_i_sub, names(Results_), sep='_')
        Results_ <- c(Results_, "maxDist"  = dist_)
        RX_final = as.data.frame(rbind(RX_final, Results_))

        # check that inequalities hold
        #mean( RX_final$FuzzyTruePositives <= RX_final$LinkOrgs_bipartiteTruePositives )
      }

      # append results across task
      RX_final_master <- cbind(RX_final_master, RX_final)
    }
    RX_final_master <- as.data.frame(RX_final_master[,!duplicated(colnames(RX_final_master))])

    ## accuracy analysis plots
    source("./Analysis/LinkOrgs_AccuracyPlots.R")
  }
}
